
// A2_crosswalk_pc8
//==============================================================================

// Description: This code crosswalks the data on the number of firms from Nace to
// the prodcom classification

clear

cd "D:\data_replication"


// Import Dataset on trade and production by pc8
//==============================================================================

use estimation\1_data_format\data_summary_1_2007.dta, clear


// Match companies by nace_r1 to import file
//==============================================================================

// 3-Digit Codes
//--------------

gen nace_r1 = substr(prccode, 1, 3)
destring nace_r1, replace
merge m:1 nace_r1 declarant using data\number_firms\companies_nace.dta
drop if _merge == 2
drop _merge
gen companies_final = companies
drop companies

// 4-Digit Codes
//--------------

drop nace_r1
gen nace_r1 = substr(prccode, 1, 4)
destring nace_r1, replace
merge m:1 nace_r1 declarant using data\number_firms\companies_nace.dta
drop if _merge == 2
drop _merge
replace companies_final = companies if companies_final == .
drop companies
rename companies_final companies


// Assign companies by Nace 1 to pc8
//==============================================================================

sort declarant nace_r1
by declarant nace_r1: egen tot_value_nace = sum(imports_value_pc8)

sort declarant nace_r1 prccode
by declarant nace_r1 prccode: egen tot_value_pc8 = sum(imports_value_pc8) 
by declarant nace_r1 prccode: keep if _n == 1
gen share = tot_value_pc8 / tot_value_nace

gen companies_pc8 = share * companies

keep declarant prccode year companies_pc8
order declarant prccode year companies_pc8

replace companies_pc8 = 1 if companies_pc8 == .									//6.6% of Observations
replace companies_pc8 = 1 if companies_pc8 == 0									//5.1% of Observations

save data\number_firms\companies_pc8.dta, replace



